package for string manipulation

packages
Author

Tony Duan

Published

July 11, 2023

1 input data

Code
library(tidyverse)
library(stringr)
Code
data001=mtcars
head(data001)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

make row number into a new column

Code
data001 <- cbind(names = rownames(data001), data001)
rownames(data001) <- NULL
head(data001)
              names  mpg cyl disp  hp drat    wt  qsec vs am gear carb
1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

2 length of string with str_length()

Code
str_length("abc")
[1] 3

3 replace with str_replace() and str_replace_all()

Code
text001="abcb"
Code
text001 %>% str_replace('b','1')
[1] "a1cb"
Code
text001 %>% str_replace_all('b','1')
[1] "a1c1"

4 subset string by postion with str_sub()

Code
data001$new_names=data001$names %>% str_sub(2,4)
head(data001 %>% select(new_names,names))
  new_names             names
1       azd         Mazda RX4
2       azd     Mazda RX4 Wag
3       ats        Datsun 710
4       orn    Hornet 4 Drive
5       orn Hornet Sportabout
6       ali           Valiant

5 handle case

Code
x <- "I like horses."

str_to_upper()

Code
str_to_upper(x)
[1] "I LIKE HORSES."

str_to_lower()

Code
str_to_lower(x)
[1] "i like horses."

‘str_to_title()’

Code
str_to_title(x)
[1] "I Like Horses."

6 make some length

Code
data001$new_names=data001$names %>% str_pad(20,"both")
head(data001$new_names)
[1] "     Mazda RX4      " "   Mazda RX4 Wag    " "     Datsun 710     "
[4] "   Hornet 4 Drive   " " Hornet Sportabout  " "      Valiant       "

7 extracting number from a string

Code
library(stringr)

trx='abc1993 ccc'

num=str_extract(trx, "(\\d)+")

num
[1] "1993"

8 Reference

https://stringr.tidyverse.org/articles/stringr.html